cdi = read_csv("./cdi.csv") %>% 
  janitor::clean_names() %>%
  mutate(
    crime_rate = crimes/pop * 1000
  )

Normal?

Note that the plots are not normal. They are all skewed.

Marginal Distribution

Total personal income

#shapiro.test(cdi$totalinc)

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(totalinc, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Total personal income", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
reg_totalinc = lm(crime_rate ~ totalinc, data = cdi)
summary(reg_totalinc)
## 
## Call:
## lm(formula = crime_rate ~ totalinc, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.901 -18.609  -4.135  15.033 223.801 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 5.348e+01  1.488e+00  35.929  < 2e-16 ***
## totalinc    4.839e-04  9.867e-05   4.904 1.32e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.64 on 438 degrees of freedom
## Multiple R-squared:  0.05206,    Adjusted R-squared:  0.04989 
## F-statistic: 24.05 on 1 and 438 DF,  p-value: 1.324e-06

There is a significant linear increasing between crime rate and total personal income

Per capita income

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(pcincome, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Per capita income", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
reg_pcincome = lm(crime_rate ~ pcincome, data = cdi)
summary(reg_pcincome)
## 
## Call:
## lm(formula = crime_rate ~ pcincome, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.506 -18.584  -3.998  14.791 237.750 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 67.3138796  6.0919784  11.050   <2e-16 ***
## pcincome    -0.0005402  0.0003206  -1.685   0.0927 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.27 on 438 degrees of freedom
## Multiple R-squared:  0.006439,   Adjusted R-squared:  0.004171 
## F-statistic: 2.839 on 1 and 438 DF,  p-value: 0.09274

There is NO significant linear relationship between crime rate and per capita income.

Percent unemployment

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(unemp, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent unemployment", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
reg_unemp = lm(crime_rate ~ unemp, data = cdi)
summary(reg_unemp)
## 
## Call:
## lm(formula = crime_rate ~ unemp, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.295 -19.243  -4.865  15.409 237.280 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  54.0598     3.9049  13.844   <2e-16 ***
## unemp         0.4891     0.5580   0.877    0.381    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.33 on 438 degrees of freedom
## Multiple R-squared:  0.001751,   Adjusted R-squared:  -0.000528 
## F-statistic: 0.7683 on 1 and 438 DF,  p-value: 0.3812

There is NO significant linear relationship between crime rate and percent unemployment.

Percent below poverty level

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(poverty, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent below poverty level", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
reg_poverty = lm(crime_rate ~ poverty, data = cdi)
summary(reg_poverty)
## 
## Call:
## lm(formula = crime_rate ~ poverty, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -64.008 -14.578  -2.561  13.605 208.853 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  33.1390     2.4435   13.56   <2e-16 ***
## poverty       2.7690     0.2472   11.20   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.12 on 438 degrees of freedom
## Multiple R-squared:  0.2226, Adjusted R-squared:  0.2209 
## F-statistic: 125.4 on 1 and 438 DF,  p-value: < 2.2e-16

There is significant linear increasing between crime rate and percent unemployment.

Percent bachelor’s degrees

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(bagrad, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent bachelor’s degrees", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
reg_bagrad = lm(crime_rate ~ bagrad, data = cdi)
summary(reg_bagrad)
## 
## Call:
## lm(formula = crime_rate ~ bagrad, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -52.264 -19.407  -4.478  15.727 239.313 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  54.4035     3.8226  14.232   <2e-16 ***
## bagrad        0.1368     0.1705   0.802    0.423    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.34 on 438 degrees of freedom
## Multiple R-squared:  0.001467,   Adjusted R-squared:  -0.0008125 
## F-statistic: 0.6436 on 1 and 438 DF,  p-value: 0.4228

There is NO significant linear relationship between crime rate and percent bachelor’s degrees.

Percent high school graduates

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(hsgrad, crime_rate)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent high school graduates", y="Crimes rate")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
reg_hsgrad = lm(crime_rate ~ hsgrad, data = cdi)
summary(reg_hsgrad)
## 
## Call:
## lm(formula = crime_rate ~ hsgrad, data = cdi)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -54.07 -18.46  -3.64  16.37 226.47 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 125.6947    14.1191   8.902  < 2e-16 ***
## hsgrad       -0.8820     0.1813  -4.865  1.6e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 26.65 on 438 degrees of freedom
## Multiple R-squared:  0.05126,    Adjusted R-squared:  0.0491 
## F-statistic: 23.67 on 1 and 438 DF,  p-value: 1.601e-06

There is significant linear decreasing between crime rate and percent unemployment.

Summary

  • There is no linear relationship between per capita income, percent unemployment, and percent bachelor’s degrees with crimes rate.

We then want to examine whether there is linear relationship between each variables.

Pairwise comparsion

Percent high school graduates v.s. Total personal income

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(hsgrad, totalinc)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent high school graduates", y="Total personal income")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
hs_to = lm(totalinc ~ hsgrad, data = cdi)
summary(hs_to)
## 
## Call:
## lm(formula = totalinc ~ hsgrad, data = cdi)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -7284  -5452  -4023    530 176963 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  1693.20    6827.87   0.248    0.804
## hsgrad         79.63      87.68   0.908    0.364
## 
## Residual standard error: 12890 on 438 degrees of freedom
## Multiple R-squared:  0.00188,    Adjusted R-squared:  -0.0003991 
## F-statistic: 0.8249 on 1 and 438 DF,  p-value: 0.3643

There is NO linear relationship between total personal income and percent high school graduates.

Poverty v.s. Percent high school graduates

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(poverty, hsgrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent below poverty level", y="Percent high school graduates")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
po_hs = lm(hsgrad ~ poverty, data = cdi)
summary(po_hs)
## 
## Call:
## lm(formula = hsgrad ~ poverty, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.8748  -2.7290   0.1789   3.4117  12.2977 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 86.64842    0.51375  168.66   <2e-16 ***
## poverty     -1.04209    0.05198  -20.05   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.072 on 438 degrees of freedom
## Multiple R-squared:  0.4785, Adjusted R-squared:  0.4773 
## F-statistic: 401.9 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear decreasing between percent below poverty level and percent high school graduates.

Poverty v.s. Total personal income

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(poverty, totalinc)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent below poverty level", y="Total personal income")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
po_to = lm(totalinc ~ poverty, data = cdi)
summary(po_to)
## 
## Call:
## lm(formula = totalinc ~ poverty, data = cdi)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6774  -5513  -4047    725 176669 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   8804.0     1305.7   6.743 4.92e-11 ***
## poverty       -107.2      132.1  -0.811    0.418    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 12890 on 438 degrees of freedom
## Multiple R-squared:  0.001501,   Adjusted R-squared:  -0.0007789 
## F-statistic: 0.6583 on 1 and 438 DF,  p-value: 0.4176

There is NO linear relationship between total personal income and percent below poverty level.

With other variables

Percent unemployment v.s. Percent high school graduates

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(unemp, hsgrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent unemployment", y="Percent high school graduates")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
hs_un = lm(hsgrad ~ unemp, data = cdi)
summary(hs_un)
## 
## Call:
## lm(formula = hsgrad ~ unemp, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -24.0550  -3.1145   0.6782   3.8344  18.8605 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  89.3101     0.8074  110.61   <2e-16 ***
## unemp        -1.7811     0.1154  -15.44   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.652 on 438 degrees of freedom
## Multiple R-squared:  0.3524, Adjusted R-squared:  0.3509 
## F-statistic: 238.3 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear decreasing between percent unemployment and percent high school graduates.

Per capita income v.s. Percent high school graduates

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(pcincome, hsgrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Per capita income", y="Percent high school graduates")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
hs_pc = lm(hsgrad ~ pcincome, data = cdi)
summary(hs_pc)
## 
## Call:
## lm(formula = hsgrad ~ pcincome, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.2272  -2.8779  -0.1909   3.8914  17.0099 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 6.078e+01  1.337e+00   45.45   <2e-16 ***
## pcincome    9.038e-04  7.038e-05   12.84   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.986 on 438 degrees of freedom
## Multiple R-squared:  0.2735, Adjusted R-squared:  0.2719 
## F-statistic: 164.9 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear increasing between per capita income and percent high school graduates.

Percent bachelor’s degrees v.s. Percent high school graduates

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(hsgrad, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent high school graduates", y="Percent bachelor’s degrees")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
ba_hs = lm(bagrad ~ hsgrad, data = cdi)
summary(ba_hs)
## 
## Call:
## lm(formula = bagrad ~ hsgrad, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.8819  -4.0177  -0.7579   3.3907  23.5428 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -38.81857    2.86825  -13.53   <2e-16 ***
## hsgrad        0.77229    0.03683   20.97   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.414 on 438 degrees of freedom
## Multiple R-squared:  0.501,  Adjusted R-squared:  0.4998 
## F-statistic: 439.7 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear increasing between percent bachelor’s degrees and percent high school graduates

Total personal income v.s. Percent bachelor’s degrees

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(totalinc, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Total personal income", y="Percent bachelor’s degrees")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
ba_to = lm(bagrad ~ totalinc, data = cdi)
summary(ba_to)
## 
## Call:
## lm(formula = bagrad ~ totalinc, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.065  -5.559  -1.339   4.034  31.575 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.004e+01  4.175e-01   48.00  < 2e-16 ***
## totalinc    1.320e-04  2.768e-05    4.77 2.51e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.472 on 438 degrees of freedom
## Multiple R-squared:  0.04939,    Adjusted R-squared:  0.04722 
## F-statistic: 22.75 on 1 and 438 DF,  p-value: 2.511e-06

There is a significant linear increasing between percent bachelor’s degrees and total personal income.

Total personal income v.s. Per capita income

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(totalinc, pcincome)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Total personal income", y="Per capita income")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
to_pc = lm(pcincome ~ totalinc, data = cdi)
summary(to_pc)
## 
## Call:
## lm(formula = pcincome ~ totalinc, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17093.5  -2115.3   -683.8   1559.1  18895.3 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.770e+04  2.129e+02  83.129  < 2e-16 ***
## totalinc    1.095e-01  1.411e-02   7.761 6.01e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3810 on 438 degrees of freedom
## Multiple R-squared:  0.1209, Adjusted R-squared:  0.1189 
## F-statistic: 60.23 on 1 and 438 DF,  p-value: 6.014e-14

There is a significant linear increasing between per capita income and total personal income.

Total personal income v.s. percent unemployment

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(totalinc, unemp)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Total personal income", y="Percent unemployment")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
to_un = lm(unemp ~ totalinc, data = cdi)
summary(to_un)
## 
## Call:
## lm(formula = unemp ~ totalinc, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4216 -1.4263 -0.3943  0.8775 14.6648 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.645e+00  1.307e-01  50.836   <2e-16 ***
## totalinc    -6.147e-06  8.665e-06  -0.709    0.478    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.339 on 438 degrees of freedom
## Multiple R-squared:  0.001148,   Adjusted R-squared:  -0.001133 
## F-statistic: 0.5032 on 1 and 438 DF,  p-value: 0.4785

There is NO linear relationship between total personal income and percent unemployment.

Per capita income v.s. percent unemployment

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(pcincome, unemp)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Per capita income", y="Percent unemployment")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
pc_un = lm(unemp ~ pcincome, data = cdi)
summary(pc_un)
## 
## Call:
## lm(formula = unemp ~ pcincome, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5349 -1.3458 -0.3355  0.9244 13.9541 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.004e+01  4.950e-01  20.284  < 2e-16 ***
## pcincome    -1.855e-04  2.605e-05  -7.122  4.4e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.216 on 438 degrees of freedom
## Multiple R-squared:  0.1038, Adjusted R-squared:  0.1017 
## F-statistic: 50.72 on 1 and 438 DF,  p-value: 4.404e-12

There is a significant linear decreasing between per capita income and percent unemployment.

Per capita income v.s. percent below poverty level

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(pcincome, poverty)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Per capita income", y="Percent below poverty level")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
pc_po = lm(poverty ~ pcincome, data = cdi)
summary(pc_po)
## 
## Call:
## lm(formula = poverty ~ pcincome, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.8943 -2.6019 -0.6274  1.6855 20.9093 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.153e+01  8.318e-01   25.89   <2e-16 ***
## pcincome    -6.903e-04  4.378e-05  -15.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.724 on 438 degrees of freedom
## Multiple R-squared:  0.3621, Adjusted R-squared:  0.3606 
## F-statistic: 248.6 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear decreasing between per capita income and percent below poverty level.

Per capita income v.s. Percent bachelor’s degrees

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(pcincome, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Per capita income", y="Percent bachelor’s degrees")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
ba_pc = lm(bagrad ~ pcincome, data = cdi)
summary(ba_pc)
## 
## Call:
## lm(formula = bagrad ~ pcincome, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0102  -3.8563  -0.8948   2.8291  22.4822 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.258e+00  1.230e+00  -2.648  0.00839 ** 
## pcincome     1.311e-03  6.475e-05  20.250  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.507 on 438 degrees of freedom
## Multiple R-squared:  0.4835, Adjusted R-squared:  0.4823 
## F-statistic: 410.1 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear increasing between per capita income and percent below poverty level.

Percent unemployment v.s. percent below poverty level

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(unemp, poverty)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent unemployment", y="Percent below poverty level")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
po_un = lm(poverty ~ unemp, data = cdi)
summary(po_un)
## 
## Call:
## lm(formula = poverty ~ unemp, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1606 -2.9857 -0.4486  2.0124 21.5913 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.97952    0.59906   4.974 9.45e-07 ***
## unemp        0.87032    0.08561  10.166  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.193 on 438 degrees of freedom
## Multiple R-squared:  0.1909, Adjusted R-squared:  0.1891 
## F-statistic: 103.4 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear increasing between percent unemployment and percent below poverty level.

Percent unemployment v.s. percent bachelor’s degrees

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(unemp, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent unemployment", y="Percent bachelor's degrees")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
ba_un = lm(bagrad ~ unemp, data = cdi)
summary(ba_un)
## 
## Call:
## lm(formula = bagrad ~ unemp, data = cdi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.163  -4.484  -1.163   3.616  25.912 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  32.7635     0.9208   35.58   <2e-16 ***
## unemp        -1.7710     0.1316  -13.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.445 on 438 degrees of freedom
## Multiple R-squared:  0.2926, Adjusted R-squared:  0.291 
## F-statistic: 181.2 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear decreasing between per capita income and percent unemployment

Percent below poverty level v.s. percent bachelor’s degrees

# Scatter plot with regression line overlaid and 95% confidence bands
cdi %>% 
  ggplot(aes(poverty, bagrad)) + geom_point(color='blue') + theme_bw(base_size=20) +
  geom_smooth(method='lm', se=TRUE, color='red') +
  labs(x="Percent below poverty level", y="Percent bachelor's degrees")
## `geom_smooth()` using formula 'y ~ x'

# Simple linear regression 
ba_po = lm(bagrad ~ poverty, data = cdi)
summary(ba_po)
## 
## Call:
## lm(formula = bagrad ~ poverty, data = cdi)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.9077  -5.1219  -0.5845   3.8046  28.2510 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 26.93575    0.70858  38.014   <2e-16 ***
## poverty     -0.67135    0.07169  -9.364   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.995 on 438 degrees of freedom
## Multiple R-squared:  0.1668, Adjusted R-squared:  0.1649 
## F-statistic: 87.69 on 1 and 438 DF,  p-value: < 2.2e-16

There is a significant linear decreasing between percent below poverty level and percent bachelor’s degrees

Summary

  • There is linear relationship between total personal income (+), poverty(+), and percent high school graduates(-) with crimes rate.

  • There is linear relationship between percent bachelor’s degrees(+), and per capita income(+), with total personal income.

  • There is linear relationship between percent high school graduates(-), percent bachelor’s degrees(-), and per capita income(-) with percent below poverty level.

  • There is linear relationship between percent below poverty level(-), percent unemployment(-), percent bachelor’s degrees(+), and per capita income(+) with percent high school graduates.